clear all

*Define global path for replication package
global path "~/Dropbox/IT_Revolution/Replication_package/JPE submission"

global path_rawdata "$path/Raw_data"
global path_cleandata "$path/Clean_data"
global path_output "$path/Output"

* Step 1: Prepare data on European employment
********************************************************************************
*Data cleaning
import delimited "$path_rawdata/lfsa_egais_1_Data.csv", clear

replace age = "1" if age == "From 15 to 39 years"
replace age = "2" if age == "From 40 to 64 years"
destring age, replace

*select sample of males
keep if sex == "Males"

*select countries in GMS
replace geo = "Germany" if geo == "Germany (until 1990 former territory of the FRG)"

keep if geo == "Austria" | geo == "Belgium" | geo == "Denmark" | geo == "Finland" |		///
		geo == "France" | geo == "Germany" | geo == "Greece" | geo == "Ireland" |		///
		geo == "Italy" | geo == "Luxembourg" | geo == "Netherlands" | geo == "Norway" |	///
		geo == "Portugal" | geo == "Spain"  | geo == "Sweden" | geo == "United Kingdom"
replace geo = "UK" if geo == "United Kingdom"

replace value = "" if value == ":"
destring value, replace

*ISCO code
gen isco = .
replace isco = 1 if isco08 == "Managers"
replace isco = 2 if isco08 == "Professionals"
replace isco = 3 if isco08 == "Technicians and associate professionals"
replace isco = 4 if isco08 == "Clerical support workers"
replace isco = 5 if isco08 == "Service and sales workers"
replace isco = 6 if isco08 == "Skilled agricultural, forestry and fishery workers"
replace isco = 7 if isco08 == "Craft and related trades workers"
replace isco = 8 if isco08 == "Plant and machine operators and assemblers"
replace isco = 9 if isco08 == "Elementary occupations" 
drop if isco == .

*adjust occupations
gen wage_rank_GMS = .
replace wage_rank_GMS = 3 if isco08 == "Professionals" | isco08 == "Managers" | isco08 == "Technicians and associate professionals"
replace wage_rank_GMS = 2 if isco08 == "Clerical support workers" | isco08 == "Craft and related trades workers" | isco08 == "Plant and machine operators and assemblers"
replace wage_rank_GMS = 1 if isco08 == "Service and sales workers" | isco08 == "Elementary occupations" 
replace wage_rank_GMS = 0 if wage_rank_GMS == .

*analytical Ranking
gen analy_high = 0
replace analy_high = 1 if isco08 == "Managers"
replace analy_high = 1 if isco08 == "Professionals"
replace analy_high = 1 if isco08 == "Technicians and associate professionals"

save "$path_cleandata/data_clean.dta", replace

* Employment by age
use "$path_cleandata/data_clean.dta", clear

*compute employment by occupation ranking
collapse (sum) value, by(age analy_high geo time)

gen year = .
replace year = 1 if time == 1997
replace year = 2 if time == 2017
drop if year == .
 
keep age year geo analy_hig value
reshape wide value, i(age geo analy_high) j(year)

*compute employment change in top occupation ranking
egen age_emp1 = sum(value1), by(geo age)
egen age_emp2 = sum(value2), by(geo age)

keep if analy_high == 1
drop analy_high

gen rel_emp1 = value1 / age_emp1
gen rel_emp2 = value2 / age_emp2

*compute change in employment by occupation ranking
gen logchange_emp = ln(rel_emp2) - ln(rel_emp1)

keep age geo logchange_emp

reshape wide logchange_emp, i(geo) j(age)

save "$path_cleandata/emp_change_age.dta", replace

* Employment (aggregate)
use "$path_cleandata/data_clean.dta", clear

*compute employment by occupation ranking
collapse (sum) value, by(analy_high geo time)

gen year = .
replace year = 1 if time == 1997
replace year = 2 if time == 2017
drop if year == .
 
keep year geo analy_high value
reshape wide value, i(geo analy_high) j(year)

collapse (sum) value*, by(geo analy_high)

egen age_emp1 = sum(value1), by(geo)
egen age_emp2 = sum(value2), by(geo)

gen rel_emp1 = value1 / age_emp1
gen rel_emp2 = value2 / age_emp2

keep if analy_high == 1
drop analy_high

*compute change in employment by occupation ranking
gen logchange_emp0 = ln(rel_emp2) - ln(rel_emp1)

keep geo logchange_emp

save "$path_cleandata/emp_change_total.dta", replace

*Merge data
use "$path_cleandata/emp_change_total.dta", clear
merge 1:1 geo using "$path_cleandata/emp_change_age.dta"
drop _merge

gen country_abr = ""
replace country_abr = "AUS" if geo == "Austria"
replace country_abr = "BEL" if geo == "Belgium"
replace country_abr = "DNK" if geo == "Denmark"
replace country_abr = "FIN" if geo == "Finland"
replace country_abr = "FRA" if geo == "France"
replace country_abr = "DEU" if geo == "Germany"
replace country_abr = "GRC" if geo == "Greece"
replace country_abr = "IRL" if geo == "Ireland"
replace country_abr = "ITA" if geo == "Italy"
replace country_abr = "LUX" if geo == "Luxembourg"
replace country_abr = "NLD" if geo == "Netherlands"
replace country_abr = "NOR" if geo == "Norway"
replace country_abr = "PRT" if geo == "Portugal"
replace country_abr = "ESP" if geo == "Spain"
replace country_abr = "SWE" if geo == "Sweden"
replace country_abr = "GRB" if geo == "UK"
drop geo

sort country_abr 

save "$path_cleandata/data_europe.dta", replace

* Step 2: Prepare data on employment in Cananda
********************************************************************************
use "$path_rawdata/data_ipums_can.dta", clear

*select sample
keep if age > 15 & age < 64
keep if sex == 1
keep if occisco < 10

*generate data by age_group-occ_group-country
gen young = (age < 40)
gen analy_high = (occisco <= 3)
gen emp = 1

collapse (sum) emp [aw=perwt], by(year country young analy_high) fast

egen tot_emp_age = sum(emp), by(year country young)
egen tot_emp_analy = sum(emp), by(year country analy_high)
egen tot_emp_country = sum(emp), by(year country)

gen rel_emp = emp / tot_emp_age
gen rel_emp0 = tot_emp_analy / tot_emp_country

keep if analy_hig == 1
replace young  = 2 if young == 0

keep country year young rel_emp*
reshape wide rel_emp, i(country year) j(young)

gen period = .
replace period = 0 if year == 2001 
replace period = 1 if year == 2011 
drop if period == .
drop year

reshape wide rel_emp1 rel_emp2 rel_emp0, i(country) j(period)

foreach N of numlist 0/2 {
	gen logchange_emp`N' = ln( rel_emp`N'1) - ln(rel_emp`N'0) 
	rename rel_emp`N'0 rel_emp`N'
}

gen country_abr = "CAN"
drop country

keep country_abr logchange_emp*
order country_abr logchange_emp2 logchange_emp0 logchange_emp1

save "$path_cleandata/data_CAN.dta", replace

* Step 3: Produce data for the graph
********************************************************************************
use "$path_cleandata/data_CAN.dta", clear
append using "$path_cleandata/data_europe.dta"

sort country_abr

gen n_yr = 20
replace n_yr=10 if country_abr=="CAN"

foreach V of varlist logchange_emp2 logchange_emp0 logchange_emp1 {
	replace `V' = `V'/n_yr
}

drop n_yr
sort logchange_emp0

export excel "$path_output/dataFigA4.xlsx", cell(A1) firstrow(variables) sheet("data") replace


*Erase temp files
erase "$path_cleandata/emp_change_total.dta"
erase "$path_cleandata/emp_change_age.dta"
erase "$path_cleandata/data_clean.dta"
erase "$path_cleandata/data_CAN.dta"
erase "$path_cleandata/data_europe.dta"
